package org.correct.crawler;

import java.io.File;
import java.util.ArrayList;

public class Crawler {
	private CrawlManager2 crawlManager;
	private AnalyzeManager2 analyzeManager;
	private String[] seedUrl;
	
	public Crawler(String seedUrl, String htmlBase,
			String allowDomain, String denyDomain) {
		crawlManager = new CrawlManager2(20);
		analyzeManager = new AnalyzeManager2(
				htmlBase, 20, 1000, allowDomain, denyDomain);
		checkFileDirectory(htmlBase);
		
		crawlManager.setAnalyzeManager(analyzeManager);
		analyzeManager.setCrawlManager(crawlManager);
		
		initSeeds(seedUrl);
	}
	
	public Crawler(String seedUrl, String htmlBase,
			String allowDomain, String[] denyDomain, int maxPageNum) {
		crawlManager = new CrawlManager2(20);
		analyzeManager = new AnalyzeManager2(
				htmlBase, 20, 1000, allowDomain, denyDomain);
		checkFileDirectory(htmlBase);
		
		crawlManager.setAnalyzeManager(analyzeManager);
		analyzeManager.setCrawlManager(crawlManager);
		analyzeManager.setHtmlNumLimit(maxPageNum);
		
		initSeeds(seedUrl);
	}
	
	public Crawler(String[] seedUrl, String htmlBase,
			String allowDomain, String[] denyDomain, int maxPageNum) {
		crawlManager = new CrawlManager2(20);
		analyzeManager = new AnalyzeManager2(
				htmlBase, 20, 1000, allowDomain, denyDomain);
		checkFileDirectory(htmlBase);
		
		crawlManager.setAnalyzeManager(analyzeManager);
		analyzeManager.setCrawlManager(crawlManager);
		analyzeManager.setHtmlNumLimit(maxPageNum);
		
		initSeeds(seedUrl);
	}
	
	private void initSeeds(String url) {
		seedUrl = new String[1];
		seedUrl[0] = url;
	}
	
	private void initSeeds(String[] url) {
		seedUrl = new String[url.length];
		for(int i = 0; i < url.length; i++)
			seedUrl[i] = url[i];
	}
	
	private void checkFileDirectory(String dir) {
		File f = new File(dir);
		if(!f.exists()) {
			f.mkdir();
		}
	}
	
	public void start() {
		analyzeManager.start();
		crawlManager.addSeedUrls(seedUrl);
		crawlManager.start();
	}
	
	public static void main(String args[]) {
		String[] denyDomain = {
			"lib",
			"jxxxfw"
		};
		String[] seedUrl = {
			"http://www.tsinghua.edu.cn/qhdwzy/index.jsp",
			"http://www.tsinghua.edu.cn/eng/index.jsp",
			"http://news.tsinghua.edu.cn/new/index_new.php",
			"http://news.tsinghua.edu.cn/new/eng/index_new.php",
			"info.tsinghua.edu.cn"
		};
		Crawler c = new Crawler(
				seedUrl,
				// "http://info.tsinghua.edu.cn/minichan/minichan/minichan/help/minichan/help/help.htm",
				// "http://jxxxfw.cic.tsinghua.edu.cn/search.do3?id=159",
				// "http://www.tsinghua.edu.cn",
				// "http://career.tsinghua.edu.cn/jyw/index.jsp",
				"html", 
				"tsinghua", 
				denyDomain,
				100);
		c.start();
	}
}
